Loading and tidying the Yelp business dataset

business <- read_csv("./data/business.csv") %>% 
  mutate(name = str_sub(name, 2, -2)) %>% 
  mutate(address = str_sub(address, 2, -2)) %>% 
  filter(neighborhood != "Downtown Tampa" & neighborhood != "North Valley")
## Parsed with column specification:
## cols(
##   business_id = col_character(),
##   name = col_character(),
##   neighborhood = col_character(),
##   address = col_character(),
##   city = col_character(),
##   state = col_character(),
##   postal_code = col_integer(),
##   latitude = col_double(),
##   longitude = col_double(),
##   stars = col_double(),
##   review_count = col_integer(),
##   is_open = col_integer(),
##   categories = col_character()
## )
categories <- business %>% 
  select(business_id, categories) %>% 
  separate(categories, into = c("cat1", "cat2", "cat3", "cat4", "cat5", "cat6", "cat7", "cat8", "cat9", "cat10", "cat11", "cat12", "cat13", "cat14", "cat15", "cat16", "cat17", "cat18", "cat19", "cat20", "cat21", "cat22", "cat23", "cat24", "cat25"), sep  = ";") %>% 
  gather(key = "cat_id", value = "category", starts_with("cat")) %>% 
  select(-cat_id) 
## Warning: Expected 25 pieces. Additional pieces discarded in 1 rows [5602].
## Warning: Expected 25 pieces. Missing pieces filled with `NA` in 21884
## rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
## 20, ...].
restaurant_ids <- categories %>% 
  filter(category == "Restaurants") %>% 
  distinct(business_id)

restaurants <- business %>% 
  left_join(restaurant_ids, by = "business_id") %>% 
  select(-categories)

rm(business)

Loading and tidying the attributes dataset

attributes <- read_csv("./data/attributes.csv") %>% 
  janitor::clean_names() %>% 
  select(business_id, alcohol)
## Parsed with column specification:
## cols(
##   .default = col_character()
## )
## See spec(...) for full column specifications.

Joining the business and attributes datasets

restaurants <- restaurants %>% 
  left_join(attributes, by = "business_id")

Creating a plotly of restaurant locations

Center of Las Vegas: 36.1699° N, 115.1398° W. Plotly displayed is surrounds this center by 0.5 degrees longitude and latitude.

restaurants %>% 
  filter(latitude > 35.6699 & latitude < 36.6699) %>% 
  filter(longitude < -114.6398 & longitude > -115.6398) %>%
  plot_ly(x = ~longitude, y = ~latitude, type = "scatter", mode = "markers",
          alpha = 0.5, 
          color = ~stars, hoverinfo = 'text',
        text = ~paste(name, " @", neighborhood, "\n", address, "\n", city, ", ", state, postal_code, "\n", stars, "stars on Yelp")) %>%
  layout(xaxis = list(title = "Longitude"),
         yaxis = list(title = "Latitude"))
<<<<<<< HEAD
=======
>>>>>>> b3016a79d0174192eca54e532ba8f779bfb4cbc4
exploratory stuff
restaurants %>% 
  group_by(neighborhood) %>% 
<<<<<<< HEAD
  count()
## # A tibble: 16 x 2
## # Groups:   neighborhood [16]
##    neighborhood        n
##    <chr>           <int>
##  1 Anthem             75
##  2 Centennial        815
##  3 Chinatown         834
##  4 Downtown         1837
##  5 Eastside         1886
##  6 Northwest        1044
##  7 South Summerlin   351
##  8 Southeast        2999
##  9 Southwest        1083
## 10 Spring Valley    2710
## 11 Summerlin         896
## 12 Sunrise           704
## 13 The Lakes         162
## 14 The Strip        2734
## 15 University        261
## 16 Westside         3494
======= count() %>% knitr::kable()
neighborhood n
Anthem 75
Centennial 815
Chinatown 834
Downtown 1837
Eastside 1886
Northwest 1044
South Summerlin 351
Southeast 2999
Southwest 1083
Spring Valley 2710
Summerlin 896
Sunrise 704
The Lakes 162
The Strip 2734
University 261
Westside 3494
>>>>>>> b3016a79d0174192eca54e532ba8f779bfb4cbc4
restaurants %>%
  filter(alcohol == "True" | is_open == "True") %>% 
  group_by(neighborhood) %>% 
  count() %>% 
  knitr::kable()
neighborhood n
Anthem 3
Centennial 46
Chinatown 23
Downtown 99
Eastside 66
Northwest 41
South Summerlin 45
Southeast 171
Southwest 41
Spring Valley 85
Summerlin 39
Sunrise 30
The Lakes 4
The Strip 67
University 12
Westside 162
<<<<<<< HEAD
  restaurants %>%
=======
restaurants %>%
>>>>>>> b3016a79d0174192eca54e532ba8f779bfb4cbc4
  filter(alcohol == "False" | is_open == "True") %>% 
  group_by(neighborhood) %>% 
  count() %>% 
  knitr::kable()
neighborhood n
Anthem 2
Centennial 14
Chinatown 16
Downtown 44
Eastside 32
Northwest 15
South Summerlin 9
Southeast 87
Southwest 21
Spring Valley 35
Summerlin 11
Sunrise 15
The Lakes 3
The Strip 168
University 2
Westside 74
restaurants %>% 
select(business_id, neighborhood, stars) %>% 
  distinct() %>% 
  group_by(neighborhood, stars) %>% 
  tally %>% 
  rename(my_count = n) %>% 
  spread(key = stars, value = my_count) %>% 
  knitr::kable()
neighborhood 1 1.5 2 2.5 3 3.5 4 4.5 5
Anthem NA 1 8 4 9 10 10 16 17
Centennial 14 18 47 77 113 138 142 101 165
Chinatown 24 11 40 60 94 168 184 141 112
Downtown 44 53 91 126 214 284 351 309 365
Eastside 64 64 139 198 268 315 367 229 242
Northwest 21 17 61 90 113 183 195 132 232
South Summerlin 1 2 10 22 44 84 75 54 59
Southeast 78 83 165 256 377 474 543 431 592
Southwest 13 22 49 89 114 166 204 185 241
Spring Valley 41 46 110 193 270 392 460 478 720
Summerlin 17 11 43 62 106 128 157 127 245
Sunrise 28 33 64 90 89 112 109 86 93
The Lakes 1 3 9 10 12 20 36 23 48
The Strip 19 37 117 277 448 596 629 383 228
University 5 10 15 24 30 57 48 45 27
Westside 60 81 156 278 369 542 624 550 834
restaurants %>%
  mutate(review_count = as.numeric(review_count)) %>% 
  select(business_id, neighborhood, review_count) %>% 
  group_by(neighborhood) %>% 
  summarise(Average_Number_of_Reviews = mean(review_count)) %>% 
  knitr::kable()
neighborhood Average_Number_of_Reviews
Anthem 42.76000
Centennial 42.26012
Chinatown 91.99400
Downtown 55.80512
Eastside 55.82927
Northwest 31.30843
South Summerlin 72.31909
Southeast 46.21407
Southwest 57.39797
Spring Valley 47.95314
Summerlin 34.29018
Sunrise 16.83097
The Lakes 35.55556
The Strip 182.80102
University 56.02682
Westside 40.56898
<<<<<<< HEAD =======
>>>>>>> b3016a79d0174192eca54e532ba8f779bfb4cbc4

Creating a plotly of open and closed restaurant compared to their rating on Yelp

restaurants %>% 
  mutate(stars = if_else(stars == 1, "1",
                         if_else(stars == 1.5, "1.5",
                                 if_else(stars == 2, "2",
                                         if_else(stars == 2.5, "2.5",
                                                 if_else(stars == 3, "3",
                                                         if_else(stars == 4, "4",
                                                                 if_else(stars == 4.5, "4.5", "5"))))))),
         review_count = as.numeric(review_count)) %>% 
  group_by(stars) %>% 
  plot_ly(x = ~stars, y = ~review_count, color = ~stars, type = "bar", colors = "Set3") %>% 
  layout(xaxis = list(title = "Stars"),
         yaxis = list(title = "Number of Reviews"))
Plots of Restaurants
popular <- categories %>% 
  filter(category == "Restaurants" | category == "Food") %>% 
  distinct(business_id) %>% 
  left_join(categories, by = "business_id") %>% 
  filter(category %in% c("Bars", "Breakfast & Brunch", "Chinese", "Italian", "Mexican", "Chicken Wings", "Salad", "Sushi Bars", "Pizza", "Steakhouses"))

restaurants %>% 
  select(business_id, neighborhood) %>% 
  inner_join(popular) %>% 
  distinct() %>% 
  group_by(neighborhood, category) %>% 
  tally() %>% 
  plotly::plot_ly(x = ~neighborhood, y = ~n, type = 'bar', color = ~category, hoverinfo = 'text',
        text = ~paste(neighborhood, " has ",
                      n, " ", category, " restaurants.")) %>%
  layout(yaxis = list(title = "Restaurants"), xaxis = list(title = "", tickangle = -45), barmode = 'stack')
## Joining, by = "business_id"
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
<<<<<<< HEAD

i guess another plot? we shall see

Plots of Restaurants
popular <- categories %>% 
  filter(category == "Restaurants" | category == "Food") %>% 
  distinct(business_id) %>% 
  left_join(categories, by = "business_id") %>% 
  filter(category %in% c("Bars", "Breakfast & Brunch", "Chinese", "Italian", "Mexican", "Chicken Wings", "Salad", "Sushi Bars", "Pizza", "Steakhouses"))

restaurants %>% 
  select(business_id, neighborhood) %>% 
  inner_join(popular) %>% 
  distinct() %>% 
  group_by(neighborhood, category) %>% 
  tally() %>% 
  plotly::plot_ly(x = ~neighborhood, y = ~n, type = 'bar', color = ~category, hoverinfo = 'text',
        text = ~paste(neighborhood, " has ",
                      n, " ", category, " restaurants.")) %>%
  layout(yaxis = list(title = "Restaurants"), xaxis = list(title = "", tickangle = -45), barmode = 'stack')
## Joining, by = "business_id"
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

Geographic Plot by Categories

restaurants %>% 
  inner_join(popular) %>% 
  plot_ly(x = ~longitude, y = ~latitude, type = "scatter", mode = "markers",
          alpha = 0.9, 
          color = ~category, hoverinfo = 'text',
        text = ~paste(name, " @", neighborhood, "\n", address, "\n", city, ", ", state, postal_code, "\n", stars, "star", category, "on Yelp.")) %>%
  layout(xaxis = list(title = "Longitude"),
         yaxis = list(title = "Latitude"))
## Joining, by = "business_id"
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
======= >>>>>>> b3016a79d0174192eca54e532ba8f779bfb4cbc4